# FILE: LiteralDataFilter.py
# DESC: Filters through literal data.

from DataFilter import DataFilter

import re

class LiteralDataFilter(DataFilter):

	def filter(self, doc):
		sdoc = re.sub("[^a-zA-Z'. ]", " ", doc) 
		sdoc = re.sub("\s{2,}", " ", sdoc)
		sdoc = re.sub("\s[a-z]+'[a-z]+|\s[a-z]+|\s[a-z]+$", "", sdoc)
		return sdoc

	# TODO: read acronym expansions from oreus.conf
	def expandAcronyms(self, doc):

		sdoc = re.sub("\.", " ", doc)
		return sdoc
